/*

Copyright (C) 1997,1998,1999,2000,2001  Franz Josef Och

mkcls - a program for making word classes .

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.

*/






#ifndef KATEG_OPT_H
#define KATEG_OPT_H
#include <string>

#include <cstdlib>
#include "Problem.h"

extern double rhoLo;

typedef int Kategory;
typedef int Word;



#ifdef FREQTYPE_DOUBLE
typedef double FreqType;
#else
typedef int FreqType;
#endif


#include "KategProblemWBC.h"


#include "KategProblemKBC.h"


enum {
  INIT_RAN=1,
  INIT_AIO=2,
  INIT_LWRW=3,
  INIT_FREQ=4,
  INIT_OTHER=5
};


enum {
  W_RAN=(8|16),
  W_DET_DECR=(16),
  W_DET_INCR =(32)
};
#define CHOOSE_WORD (8|16|32)


enum {
  K_DET=(64),
  K_RAN=(128),
  K_BEST=(64|128)
};
#define CHOOSE_KAT (64|128)


enum {
  CRITERION_ML=0,
  CRITERION_LO=1,
  CRITERION_MY=2
};



class NWG
{
private:
  Array<FreqType> freq;

  Array<int> timeOfFreq;




  int curTime;
public:
  NWG(int n);
  void init();

  int anzNot0;


  Array<int> not0;

  int word;

  inline void addFreq(int C,FreqType n);

  void sort();

  FreqType getFreq(int i) {
    if( timeOfFreq[i]==curTime )
      return freq[i];
    else
      return 0;
  };
};

inline void NWG::addFreq(int g,FreqType n)
{
  if(timeOfFreq[g]==curTime)
    freq[g]+=n;
  else {
    timeOfFreq[g]=curTime;
    freq[g]=n;
    not0[anzNot0++]=g;
  }
}



struct KategProblemChange : public ProblemChange {
  void *operator new(size_t size);
  void operator delete(void *ptr,size_t size);

  int word;
  int toKat;
  int fromKat;
};

class KategProblem : public Problem
{
private:
  double kat_h_full(int n);
  double kat_h_full(double n);
  double kat_h_part(int n);
  double kat_h_part(double n);
  double sigmaVerfaelschung;
  short katWasEmpty;



  int nwgWord;

  NWG nwg;
  NWG ngw;
  FreqType nww;

  int ursprung,ziel;

  Array<int> _katOfWord;

  int _maxComp,_maxCompVal;

  double nmo_my(int i,int j);
  double nmo(int i,int j);


  double nmo_lo(int i,int j,int &e0,int &e1);


  void putWord(int word,int to);


  void fastPutWord(int word,int to);


  void setKatOfWord(int w,int k) {
    if( !(wordFreq.fixedWord[w]==k||wordFreq.fixedWord[w]==-1||k==-1) ) {
      cout << "mkcls::setKatOfWord::ERROR: " << w << " " << k << " " << wordFreq.fixedWord[w] << " " << (*words)[w] << endl;
    }
    _katOfWord[w]=k;
    nwgWord=-1;
  };


  void fillNWG(int w);


  inline FreqType nstrich(int i,int j);


  void vnstrich(int i,int j);



protected:
  virtual int _change(ProblemChange **p);


  virtual void _doChange(ProblemChange &c);


  virtual void _undoChange(ProblemChange &c);


  virtual double _value();


  double _valueChange(KategProblemChange &k);


  virtual void incrementDirection();


  virtual int maxDimensionVal(void) ;


  virtual int maxDimension(void) ;


public:
  leda_array<string> *words;
  typedef leda_set<int> intSet;

  leda_array<intSet> *kats;

  KategProblemWBC wordFreq;
  KategProblemKBC katFreq;

  Array<int> initLike;

  KategProblem(int aw,int mak,int _initialisierung,int _auswertung,
               int _nachbarschaft,int minw=0);


  virtual ~KategProblem();


  virtual void _initialize(int initTyp);
  virtual void _initialize(int initTyp,int specialFixedWord);


  virtual double valueChange(ProblemChange&c);


  virtual Problem *makeEqualProblem();


  virtual double nicevalue(double value=1e100);


  void makeKats();


  virtual void dumpOn(ostream &strm);


  virtual void dumpInfos(ostream &strm);





  inline void katwahl(int k);


  inline void wortwahl(int w);





  inline int katOfWord(int w);


  inline short wortwahl();


  inline short katwahl() ;


  virtual int maxNonBetterIterations();


  virtual int expectedNumberOfIterations();


  const char *getString(int i);
  string getTheString(int i);


  void makeTitle(char x[512]);


  void fixInitLike();

};

inline int KategProblem::katOfWord(int w)
{
  return _katOfWord[w];
};
inline short KategProblem::wortwahl()
{
  return nachbarschaft&CHOOSE_WORD;
};
inline short KategProblem::katwahl()
{
  return nachbarschaft&CHOOSE_KAT;
};

inline void KategProblem::katwahl(int k)
{
  nachbarschaft = (nachbarschaft&(~CHOOSE_KAT)) | k;
  if(k==K_BEST)
    _maxCompVal=1;
  else
    _maxCompVal=katFreq.nKats-2;
};

inline void KategProblem::wortwahl(int w)
{
  nachbarschaft = (nachbarschaft&(~CHOOSE_WORD)) | w;
};



inline FreqType KategProblem::nstrich(int i,int j)
{
  FreqType n=0;

  if( i==ursprung )
    n-=nwg.getFreq(j);
  if( i==ziel )
    n+=nwg.getFreq(j);

  if( j==ursprung )
    n-=ngw.getFreq(i);
  if( j==ziel )
    n+=ngw.getFreq(i);

  if( i==ursprung && j==ursprung )
    n+=nww;
  if( i==ziel && j==ziel )
    n+=nww;

  if( i==ursprung && j==ziel )
    n-=nww;
  if( i==ziel && j==ursprung )
    n-=nww;

  return n;
}





#define MAX_H_TABLE 4000
extern double h_table[],l_table[],hmy_table[],hmy_sigma;


inline double kat_mlog(double x)
{
  if(x<=1e-9)
    return 0;
  else
    return log(x);
}


inline double kat_mlog(int s)
{
  if(s<=0)
    return 0;
  else if( s<MAX_H_TABLE ) {
    massert( s==0 || l_table[s]==log(s) );
    return l_table[s];
  } else
    return log((double)(s));
}



inline double kat_hlo(int n)
{
  return n*kat_mlog(n-1);
}

inline double kat_hlo(double n)
{
  return n*kat_mlog(n-1);
}


inline double kat_h(int n)
{
  massert(n>=-1);
  if(n<=0)
    return 0;
  else if(n<MAX_H_TABLE) {
    massert(n==0||fabs(h_table[n]-n*log((double)n))<1e-8);
    return h_table[n];
  } else
    return n*log((double)(n));
}
inline double kat_h(double n)
{
  if(n<=1e-9)
    return 0;
  else
    return n*log(n);
}


inline double kat_etaFkt(int _e0,int e1,int immer0,int cats)
{
  int e0    = _e0 - immer0;
  int ePlus = cats*cats - _e0;
  if(cats*cats-e0>1)
    return e1*log( (ePlus-1.0)/(e0+1.0)*rhoLo );
  else
    return 0;
}

double mkat_h_full(int n,double tf);
double mkat_h_part(int n,double cf);

int Hash(const string& s);


#endif

